DM 'CLEAR LOG; CLEAR OUTPUT;';
libname duration 'path to be completed path to duration data sets';
title '                   ';

/******************************************************************/
/* Compute Frequency of Wage agreements (without NMW)***************************/
/******************************************************************/
/*duree_accords_br : contains observed durations between two industry-level agreements*/
/*main variables:
D23: sector
SIREN: firm identifier
YP: number of employees
ann_acc: year of agreement
code_dept: local departement
mois_acc: month of agreement
t_ents_yp: size class*/

/*duree_acc_ents : contains observed durations between two firm-level agreements*/
/*main variables:
D23: sector
SIREN: firm identifier
YP: number of employees
ann_acc: year of agreement
code_dept: local departement
mois_acc: month of agreement
t_ents_yp: size class*/

/*duree_acc_debfin : contains dates of beginning and end of sample firms*/
/*main variables:
D23: sector
SIREN: firm identifier
YP: number of employees
code_dept: local departement
t_ents_yp: size class
indic_deb date of first observation of the firm in the sample
indic_fin date of last observation of the firm in the sample
*/ 

/****************keep all dates of wage agreements at the industry-level for all firms***********/
data duree_accords_br_f;
set duration.duree_accords_br ;
indic_acc_br=1;
drop an l_an l_mois;
run;
/****************keep all dates of wage agreements at the firm-level for all firms************/
data duree_acc_ents_f;
set duration.duree_acc_ents ;
indic_acc_e=1;
drop an l_an l_mois;
run;
/****************keep the first and the last observation for all firms************/
data duree_acc_debfin_f;
set duration.duree_acc_debfin;
indic_acc=0;
drop an l_an l_mois;
run;


/*************Stack all three data sets*/
data duree_accords_br_ents_f;
set duree_accords_br_f duree_acc_ents_f duree_acc_debfin_f; 
drop an l_an l_mois;
run;
/*************delete firm with no identifier*/
data duree_accords_br_ents_f;
set duree_accords_br_ents_f;
if siren='         ' then delete;
run;


proc sort data=duree_accords_br_ents_f;
by siren ann_acc mois_acc ;
run;
/*define censoring (left or right) and define a variable "indic_acc" equal to one if there is a wage agreement (industry or firm)*/
data duree_accords_br_ents_f;
set duree_accords_br_ents_f;
indic_cd=0;
indic_cg=0;
if indic_deb=1 then indic_cg=1;
if indic_fin=1 then indic_cd=1;

if indic_acc_br=. then  indic_acc_br=0;
if indic_acc_e=. then  indic_acc_e=0;
if indic_acc_e=0 and indic_acc_br=0 then indic_acc=0;
if indic_acc_e=1 or indic_acc_br=1 then indic_acc=1;

run;
/*keep only useful variables : sector (D23), firm identifier (siren), year of agreement (an_acc), geographical location (code_dept), left and right censoring dummy, 
month of the agreement (month_acc), size of the firm in classes (t_ents_yp), nb of employees (yp), dummy for agreement at the industry, firm and both levels*/
data duree_accords_br_ents_f;
set duree_accords_br_ents_f;
keep D23 SIREN ann_acc code_dept indic_cd indic_cg  mois_acc t_ents_yp yp indic_acc_br indic_acc_e indic_acc; 
run;


proc sort data=duree_accords_br_ents_f nodupkeys;
by siren ann_acc mois_acc ;
run;
data duree_accords_br_ents_f;
set duree_accords_br_ents_f;
rename ann_acc=an;
rename mois_acc=mois; 
run;


/*duration.duree_accords: contain all firms coming from the FIBEN data set (with and without agreements)- year frequency*/
data freq_accords;
set duration.duree_accords;
keep D23 SIREN an code_dept t_ents_yp yp; 
run;

/*construct a data set containing all observations BUT at a monthly frequency*******/
proc sort data=freq_accords;
by siren an;
run;
data freq_essai1;set freq_accords;mois=1;run;
data freq_essai2;set freq_accords;mois=2;run;
data freq_essai3;set freq_accords;mois=3;run;
data freq_essai4;set freq_accords;mois=4;run;
data freq_essai5;set freq_accords;mois=5;run;
data freq_essai6;set freq_accords;mois=6;run;
data freq_essai7;set freq_accords;mois=7;run;
data freq_essai8;set freq_accords;mois=8;run;
data freq_essai9;set freq_accords;mois=9;run;
data freq_essai10;set freq_accords;mois=10;run;
data freq_essai11;set freq_accords;mois=11;run;
data freq_essai12;set freq_accords;mois=12;run;
data freq_accords_tot;
set freq_essai1 freq_essai2 freq_essai3 freq_essai4 freq_essai5 freq_essai6 freq_essai7 freq_essai8 freq_essai9 freq_essai10 freq_essai11 freq_essai12;
run;
proc sort data=freq_accords_tot;
by siren an mois;
run;

/*merge data set containing all agreements with year and month with monthly frequency data set containing all firms observations*/
data duration.freq_accords_totb;
merge freq_accords_tot(in=in1) duree_accords_br_ents_f;
by siren an mois;
if in1;
run;
/*when there is no agreement, dummy agreement variables are equal to 0*/
data duration.freq_accords_totb;
set duration.freq_accords_totb;
if indic_cd=. then indic_cd=0;
if indic_cg=. then indic_cg=0;
if indic_acc_br=. then  indic_acc_br=0;
if indic_acc_e=. then  indic_acc_e=0; 
if indic_acc =. then indic_acc =0;
run;
/************* The data set contains all firms observations at a monthly frequency + a dummy variable equal to one when there is an industry or a firm-level agreements 
at a given date (month+year)********/


/**********Frequency of wage agreements (without minimum wage **********/
proc freq data=duration.freq_accords_totb;
tables indic_acc;
weight yp;
run;
/**********Frequency of wage agreements (firm-level) (without minimum wage) **********/
proc freq data=duration.freq_accords_totb;
tables indic_acc_e;
weight yp;
run;
/**********Frequency of wage agreements (industry-level) (without minimum wage) **********/
proc freq data=duration.freq_accords_totb;
tables indic_acc_br;
weight yp;
run;

/******************************************************************/
/* Compute Frequency of Wage agreements (WITH NMW)***************************/
/******************************************************************/
/****************keep all dates of wage agreements at the industry-level for all firms***********/
data duree_accords_br_f;
set duration.duree_accords_br ;
indic_acc_br=1;
drop an l_an l_mois;
run;
/****************keep all dates of wage agreements at the firm-level for all firms************/
data duree_acc_ents_f;
set duration.duree_acc_ents ;
indic_acc_e=1;
drop an l_an l_mois;
run;
/****************keep the first and the last observation for all firms************/
data duree_acc_debfin_f;
set duration.duree_acc_debfin;
indic_acc=0;
drop an l_an l_mois;
run;
/****************keep all dates of NMW changes for all firms************/
data duree_accord_s_f;
set duration.duree_accord_smic;
indic_acc_s=1;
drop an l_an l_mois;
run;


/*************Stack all four data sets*/
data duree_accords_br_ents_s_f;
set duree_accords_br_f duree_acc_ents_f duree_accord_s_f duree_acc_debfin_f; 
drop an l_an l_mois;
run;
data duree_accords_br_ents_s_f;
set duree_accords_br_ents_s_f;
if siren='         ' then delete;
run;

/*define censoring (left or right) and define a variable "indic_acc" equal to one if there is a wage agreement (industry or firm or NMW)*/
proc sort data=duree_accords_br_ents_s_f;
by siren ann_acc mois_acc ;
run;
data duree_accords_br_ents_s_f;
set duree_accords_br_ents_s_f;
indic_cd=0;
indic_cg=0;
if indic_deb=1 then indic_cg=1;
if indic_fin=1 then indic_cd=1;

if indic_acc_br=. then  indic_acc_br=0;
if indic_acc_e=. then  indic_acc_e=0;
if indic_acc_e=0 and indic_acc_br=0 and indic_acc_s=0 then indic_acc=0;
if indic_acc_e=1 or indic_acc_br=1 or indic_acc_s=1 then indic_acc=1;
run;

/*keep only useful variables : sector (D23), firm identifier (siren), year of agreement (an_acc), geographical location (code_dept), left and right censoring dummy, 
month of the agreement (month_acc), size of the firm in classes (t_ents_yp), nb of employees (yp), dummy for agreement at the industry, firm and both levels*/
data duree_accords_br_ents_s_f;
set duree_accords_br_ents_s_f;
keep D23 SIREN ann_acc code_dept indic_cd indic_cg  mois_acc t_ents_yp yp indic_acc_br indic_acc_e indic_acc_s indic_acc; 
run;
proc sort data=duree_accords_br_ents_s_f nodupkeys;
by siren ann_acc mois_acc ;
run;
data duree_accords_br_ents_s_f;
set duree_accords_br_ents_s_f;
rename ann_acc=an;
rename mois_acc=mois; 
run;


/*merge data set containing all agreements with year and month with monthly frequency data set containing all firms observations*/
proc sort data=freq_accords_tot;
by siren an mois;
run;
proc sort data=duree_accords_br_ents_s_f;
by siren an mois;
run;

data freq_accords_tot_s;
merge freq_accords_tot(in=in1) duree_accords_br_ents_s_f;
by siren an mois;
if in1;
run;
/*indic_acc=0 if no wage agreement*/
data freq_accords_tot_s;
set freq_accords_tot_s;
if indic_cd=. then indic_cd=0;
if indic_cg=. then indic_cg=0;
if indic_acc_br=. then  indic_acc_br=0;
if indic_acc_e=. then  indic_acc_e=0; 
if indic_acc =. then indic_acc =0;
run;



/****************************************************************/
/* We duplicate the whole data set creating two other data sets:
/* - one accounting for minimum wage earners: the data set will contain the proportion of NMW earners in a given industry 
for a given date and will be associated with all firms. We then will calculate the nb of employees in a given firm covered by NMW
	- one accounting for other workers: the data set will contain the proportion of other workers (100-%of NMW earners) in a given industry 
for a given date and will be associated with all firms. We then will calculate the nb of employees in a given firm not covered by NMW

Then we will stack the data sets and compute the weighted (by nb of employees) frequecy of wage agreements including NMW*/

/*duration.duree_accord_smic1: contains for each firm*year the % of minimum wage earners in its industry * d�partement (geographical location)***/
data freq_accord_smic1;
set duration.duree_accord_smic1;
rename ann_acc=an;
run;
/*merge this data set on NMW earners to wage agreement data set*/
proc sort data=freq_accord_smic1;
by siren an;
run;
proc sort data=freq_accords_tot_s;
by siren an;
run;
data freq_accords_tot_sb;
merge freq_accords_tot_s freq_accord_smic1;
by siren an;
run;
/*calculate the "theoretical number" of employees concerned by NMW increase in a given firm as the number of empoyees (YP) 
																									* the % of minimum wage earners in a given (industry * year)*/
data freq_accords_tot_sb;
set freq_accords_tot_sb;
yp_smic=yp*p_smic2/100;
if indic_acc_s=. then indic_acc_s=0;
run;

/*duration.duree_accord_smic0: contains for each firm*year the % of other workers in its industry * d�partement (geographical location)***/
data freq_accord_smic0;
set duration.duree_accord_smic0;
rename ann_acc=an;
run;
/*merge this data set on NMW earners to wage agreement data set*/
proc sort data=freq_accord_smic0;
by siren an;
run;
proc sort data=freq_accords_tot_s;
by siren an;
run;
data freq_accords_totc;
merge freq_accords_tot_s freq_accord_smic0;
by siren an;
run;
/*calculate the "theoretical number" of employees not covered by NMW increase in a given firm as the number of empoyees (YP) 
																								* (100-the % of minimum wage earners in a given (industry * year)*/

data freq_accords_totc;
set freq_accords_totc;
yp_smic=yp*p_smic2/100;
if indic_acc_s=. then indic_acc_s=0;
run;

/*Stack the two data sets*/
data duration.freq_accords_tot_s10 ;
set freq_accords_totc freq_accords_tot_sb;
run;

proc sort data=duration.freq_accords_tot_s10 ;
by siren an mois;
run;
/****************************************/
/*Compute the frequency of wage agreements*/
/********************************************/
/*TABLE 1 and 2*/

/*TABLE 1 line 3**************************/
proc freq data=duration.freq_accords_tot_s10 ;
tables indic_acc;
weight yp_smic;
run;

/* TABLE 2 first column BY industry (D23)*/
proc sort data=duration.freq_accords_tot_s10 out=test_secteur ;
by d23;
run;
proc freq data=test_secteur noprint ;
tables indic_acc / out=freq_secteur;
by d23;
weight yp_smic;
run;
proc means data=test_secteur noprint ;
var yp_smic ;
by d23;
output out=  poids_secteur mean=poids_sect;
run;
data freq_secteur;
set freq_secteur;
if indic_acc=0 and percent=100 then indic_acc=1 ;
if indic_acc=0 and percent=100 then percent=0 ;
run;
data freq_secteur;
set freq_secteur;
if indic_acc=0 then delete;
run;
data freq_secteur2;
merge freq_secteur poids_secteur (in=in1);
by d23;
if in1;
run;
data freq_secteur2;
set freq_secteur2;
sect=substr(d23,1,2);
run;
data freq_secteur2;
set  freq_secteur2;
/*
A	1	2
B	5	
C	10	14
D	15	37
E	40	41
F	45	
G	50	52
H	55	
I	60	64
J	65	67
K	70	74
L	75	
M	80	
N	85	
O	90	93
P	95	97
Q	99	
*/
if sect ge 1 and sect le 2 then s='A';
if sect ge 5 and sect le 5 then s='B';
if sect ge 10 and sect le 14 then s='C';
if sect ge 15 and sect le 37 then s='D';
if sect ge 40 and sect le 41 then s='E';
if sect ge 45 and sect le 45 then s='F';
if sect ge 50 and sect le 52 then s='G';
if sect ge 55 and sect le 55 then s='H';
if sect ge 60 and sect le 64 then s='I';
if sect ge 65 and sect le 67 then s='J';
if sect ge 70 and sect le 74 then s='K';

if sect ge 75 and sect le 75 then s='L';
if sect ge 80 and sect le 80 then s='M';
if sect ge 85 and sect le 85 then s='N';

if sect ge 90 and sect le 93 then s='O';
if sect ge 95 and sect le 97 then s='P';
if sect ge 99 and sect le 99 then s='Q';

ss='S';
if s in('A', 'B', 'C') then ss='A';
if s in('D', 'E') then ss='I';

if s in('F') then ss='J';


/*
if sect ge 90 and sect le 93 then s='R';

if sect ge 94 and sect le 96 then s='S';
if sect ge 97 and sect le 98 then s='T';
if sect ge 99 and sect le 99 then s='U';*/
run;

/**********TABLE 2 column 1 ***********/
proc univariate data=freq_secteur2;
var percent;
weight poids_sect;
by ss;
output out=sortie_sect mean=moy median=med q1=q1 q3=q3 p90=p90 n=nb;
run;
/**********TABLE 2 column 2 ***********/
data sortie_sect;
set sortie_sect;
dur=1/(moy/100);
run;
/*
proc univariate data=freq_secteur2;
var percent;
weight poids_sect;
run;*/

/* By size */
proc sort data=duration.freq_accords_tot_s10 out=test_taille;
by t_ents_yp;
run;
/* TABLE 2 first column by size */
proc freq data=test_taille noprint ;
tables indic_acc / out=freq_taille;
by t_ents_yp;
weight yp_smic;
run;

data freq_taille;
set freq_taille;
if indic_acc=0 then delete;
run;


/*************TABLE 6 EFFECTS ********************************/
/*sort table by year month*/
proc sort data=rev.freq_effet_smic9401_s10 out=test_m;
by an mois;
run;

/*compute the frequency of wage changes by date*/
proc freq data=test_m ;
tables indic_acc  / out=rev.res_f_agree_an_mois noprint;
weight yp_smic;
by an mois;
run;
/*firm-level*/
proc freq data=test_m ;
tables indic_acc_e  / out=rev.res_f_agree_e_an_mois noprint;
weight yp_smic;
by an mois;
run;
/*industry-level*/
proc freq data=test_m ;
tables indic_acc_br  / out=rev.res_f_agree_br_an_mois noprint;
weight yp_smic;
by an mois;
run;


data rev.res_f_agree_br_an_mois;
set rev.res_f_agree_br_an_mois;
if indic_acc_br=0 then delete;
drop count;
rename percent=f_br;
run;
data rev.res_f_agree_e_an_mois;
set rev.res_f_agree_e_an_mois;
if indic_acc_e=0 then delete;
drop count;
rename percent=f_e;
run;
data rev.res_f_agree_an_mois;
set rev.res_f_agree_an_mois;
if indic_acc=0 then delete;
drop count;
rename percent=f_tot;
run;
data rev.res_f_agree_an_mois_tot;
merge rev.res_f_agree_an_mois rev.res_f_agree_br_an_mois rev.res_f_agree_e_an_mois;
by an mois;
run;
data rev.res_f_agree_an_mois_tot;
set rev.res_f_agree_an_mois_tot;
drop indic_acc_br indic_acc_e indic_acc;
run;

/*compute the frequency of wage changes by month*/
/*TABLE 6 column 3*/
proc sort data=rev.res_f_agree_an_mois_tot;
by mois;
run;
proc means data=rev.res_f_agree_an_mois_tot;
var f_tot f_br f_e;
by mois;
run;




proc sort data=rev.freq_acc_smic9401_s10 nodupkeys out=test_m_firm;
by siren an mois;
run;
proc sort data=test_m_firm;
by mois;
run;
/*compute the proportion of firm-level wage agreement effect by month*/
/*TABLE 6 column 2*/
proc freq data=test_m_firm ;
tables indic_acc_e  / out=rev.res_f_agree_mois_firm noprint;
weight yp_smic;
by mois;
run;
/*compute the proportion of industry-level wage agreement effect by month*/
/*TABLE 6 column 1*/

proc freq data=test_m_firm ;
tables indic_acc_br  / out=rev.res_f_agree_mois_industry noprint;
weight yp_smic;
by mois;
run;
